FROM ubuntu:18.04

LABEL maintainer="FredyVia <fredyvia@qq.com>"

ENV   ENABLE_INIT_DAEMON=false \
      INIT_DAEMON_BASE_URI=http://identifier/init-daemon \
      INIT_DAEMON_STEP=spark_master_init \
      BASE_URL=https://mirrors.aliyun.com/apache/spark/ \
      SPARK_VERSION=2.4.8 \
      HADOOP_VERSION=2.7 \
      DEBIAN_FRONTEND=noninteractive

COPY  wait-for-step.sh execute-step.sh finish-step.sh /

RUN   sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list && apt update \
      #Give permission to execute scripts
      && chmod +x /wait-for-step.sh && chmod +x /execute-step.sh && chmod +x /finish-step.sh \
      && apt install -y wget && apt clean \
      && wget ${BASE_URL}/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
      && tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
      && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark \
      && rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz

RUN   apt install -y python python-pip vim openjdk-8-jre && apt clean
      # && ln -s /usr/bin/python3 /usr/bin/python \
RUN   pip2 install --no-cache-dir  -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com numpy pyspark pymongo \
      && chmod +x *.sh
      
# Fix the value of PYTHONHASHSEED
# Note: this is needed when you use Python 3.3 or greater
ENV PYTHONHASHSEED 1
